import pandas as pd
from sklearn.decomposition import PCA
import time

if __name__ =="__main__":
    train_num = 5000
    test_num = 7000
    data = pd.read_csv('train.csv')
    train_data = data.values[0:train_num,1:]
    train_label = data.values[0:train_num,0]
    test_data = data.values[train_num:test_num,1:]
    test_label = data.values[train_num:test_num,0]
    print(train_data.shape)
    t = time.time()

    #PCA降维
    pca = PCA(n_components=0.8, whiten=True)
    print('start pca...')
    train_x = pca.fit_transform(train_data)
    test_x = pca.transform(test_data)
    print(train_x.shape)
    print(train_x)
    #降维后的各主成分的方差值占总方差值的比例，这个比例越大，则越是重要的主成分
    print(pca.explained_variance_ratio_)